start_time <- Sys.time()
#### #### Load Objects & Functions #### ####
######################################################
# Import functions
root = "./"
source(file.path(root,"MAIN.R"))
import_parameters(params)## **** __Utilized Cores__ **** = 2$subsetGenes
## [1] "protein_coding"
##
## $subsetCells
## [1] 500
##
## $resolution
## [1] 0.6
##
## $resultsPath
## [1] "./Results"
##
## $nCores
## [1] 2
##
## $perplexity
## [1] 30
######################################################
#### #### PACKAGES #### ####
######################################################
library(monocle3)
paste("monocle3", packageVersion("monocle3")) ## [1] "monocle3 0.1.2"
library(cowplot)
library(ggplot2)
library(dplyr)
library(data.table)
library(readxl)
library(reshape2)
library(ggrepel)
library(plotly)
library(GeneOverlap) # BiocManager::install("GeneOverlap")
######################################################
# Exporting 3D plots
knitr::knit_hooks$set(webgl = rgl::hook_webgl)
dge_limit <- F # 100
nCores <- 4#parallel::detectCores()
set.seed(2019)
# Load preprocessed data
load("./Data/monocle3_CDS.RData")
cds## class: cell_data_set
## dim: 14827 19144
## metadata(1): cds_version
## assays(1): counts
## rownames(14827): A1BG A2M ... ZZEF1 ZZZ3
## rowData names(1): gene_short_name
## colnames(19144): GTCATTTTCCGCATCT ATAAGAGAGGAGTTGC ...
## CAGTAACCCGCTCGCA ACACCGGTCCGGCCCA
## colData names(23): nGene nUMI ... cell_type cluster_ext_type
## reducedDimNames(2): PCA UMAP
## spikeNames(0):
Detect whether or not to limit DGE analysis to only the top N most variable genes.
# DGE limiter
# dge_limit <- 10
if(dge_limit){
cds_DGE <- cds[var.genes[1:dge_limit],]
print(paste0("Testing only the top ",dge_limit," most variable genes in DGE analysis."))
} else{
cds_DGE <- cds
print(paste0("Testing all ",dim(cds)[1]," genes in DGE analysis."))
} ## [1] "Testing all 14827 genes in DGE analysis."
# DGE distribution type
expression_family <- "quasipoisson" # Recommended by Monocle3 authors for most cases.
within_clusters <- c(1,2)
DT_max <- 1000dge.dx <- monocle3_DGE(cds_DGE = cds_DGE,
variable = "dx",
nCores = nCores,
expression_family = expression_family,
plot_volcano = T,
results_path = "./Results/across_PD.vs.Ctrl.csv") ## [1] "Results file already exists. Importing..."
## Warning: Removed 1197 rows containing missing values (geom_point).
dge.mut <- monocle3_DGE(cds_DGE = cds_DGE,
variable = "dx",
variable_subsets = c("PD","GBA"),
nCores = nCores,
expression_family = expression_family,
plot_volcano = T,
results_path = "./Results/across_PD.vs.GBA.csv") ## [1] "Results file already exists. Importing..."
## Warning in max(neg.log.pvals): no non-missing arguments to max; returning -
## Inf
## Warning in max(abs(dge_sig$estimate)): no non-missing arguments to max;
## returning -Inf
## Warning: Removed 6 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_hline).
dge.clust <- monocle3_DGE(cds_DGE = cds_DGE,
variable = "Cluster",
variable_subsets = c(1,2),
nCores = nCores,
expression_family = expression_family,
plot_volcano = T,
results_path = "./Results/across_Clust1.vs.Clust2.csv") ## [1] "Results file already exists. Importing..."
## Warning: Removed 2527 rows containing missing values (geom_point).
for (clust in within_clusters){
cat("\n")
cat("#### Cluster ",clust,"\n")
cds_clust <- cds_DGE[,pData(cds_DGE)$Cluster==clust]
dge.dx.clust <- monocle3_DGE(cds_DGE = cds_clust,
variable = "dx",
nCores = nCores,
expression_family = expression_family,
plot_volcano = T,
results_path = paste0("./Results/within.Clust",clust,"_PD.vs.Ctrl.csv") )
createDT_html(head(dge.dx.clust, DT_max)) %>% print()
cat("\n")
}[1] “Results file already exists. Importing…”
## Warning: Removed 1419 rows containing missing values (geom_point).
[1] “Results file already exists. Importing…”
## Warning: Removed 1957 rows containing missing values (geom_point).
for (clust in within_clusters){
cat("\n")
cat("#### Cluster ",clust,"\n")
cds_clust <- cds_DGE[,(pData(cds_DGE)$Cluster==clust)]
dge.mut.clust <- monocle3_DGE(cds_DGE = cds_clust,
variable = "mut",
variable_subsets = c("PD","GBA"),
nCores = nCores,
expression_family = expression_family,
plot_volcano = T,
results_path = paste0("./Results/within.Clust",clust,"_PD.vs.GBA.csv") )
createDT_html(head(dge.mut.clust, DT_max)) %>% print()
cat("\n")
}[1] “Results file already exists. Importing…”
## Warning: Removed 1775 rows containing missing values (geom_point).
[1] “Results file already exists. Importing…”
## Warning: Removed 2359 rows containing missing values (geom_point).
# Cluster-specific markers
marker_res <- top_cluster_markers(cds_DGE,
cluster_list=c(1,2),
genes_to_test_per_group=1000,
save_path= "./Results/cluster_markers.csv")filtered_res <- marker_res %>%
filter(fraction_expressing >= 0.10 & marker_test_q_value <=0.05) %>%
group_by(cell_group) %>% arrange(desc(specificity), desc(pseudo_R2))
createDT(filtered_res)## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html